local dir "YOUR PATH HERE"
use "`dir'\nlsy97_learning_panel.dta", clear
*by education
gen hs_less=educ>=1 & educ<=12
gen ba_deg=educ>=16 & educ<=20
*create measure of actual work experience
gen work_exp=wkexp_yr_total-wkexp_yr_youth
replace work_exp=23 if work_exp>23 & work_exp!=.
replace work_exp=round(work_exp)
forvalues y=21(2)40 {
	local a=`y'+1
	gen age_`y'to`a'=age>=`y' & age<=`a'
}
foreach var of varlist age_21to22- age_39to40 {
	gen ba_`var'=ba_deg*`var'
}
forvalues y=1(1)23 {
	gen workexp_`y'=work_exp==`y'
}
forvalues y=0(2)22 {
	local a=`y'+1
	gen workexp_`y'to`a'=work_exp>=`y' & work_exp<=`a'
}

foreach var of varlist workexp_0to1-workexp_22to23 {
	gen ba_`var'=ba_deg*`var'
}

tsset id year
sort id year
*measure increase in jobs from wave to wave


*Reorder job variables chronologically, within wave
foreach x in wage hrs weeks occ occ1990dd empID {
	forvalues y=1(1)5 {
		gen `x'_j`y'=.
	}
}
*Even up missings - rule is that the job only counts if wage and empID are both nonmissing*
foreach x in hrs weeks occ occ1990dd wage {
	forvalues y=1(1)5 {
		replace `x'`y'=. if empID`y'==.
	}
}
foreach x in hrs occ occ1990dd empID {
	forvalues y=1(1)5 {
		replace `x'`y'=. if wage`y'==.
	}
}
*Now count number of jobs in each wave that satisfy criteria
egen job_count=rownonmiss(empID1 empID2 empID3 empID4 empID5)
*loop over jobs
foreach x in wage hrs weeks occ occ1990dd empID {
	forvalues y=5(-1)1 {
		replace `x'_j1=`x'`y' if job_count==`y'
	}
}
foreach x in wage hrs weeks occ occ1990dd empID {
	forvalues y=5(-1)2 {
		local d=`y'-1
		replace `x'_j2=`x'`d' if job_count==`y'
	}
}
foreach x in wage hrs weeks occ occ1990dd empID {
	forvalues y=5(-1)3 {
		local c=`y'-2
		replace `x'_j3=`x'`c' if job_count==`y'
	}
}
foreach x in wage hrs weeks occ occ1990dd empID {
	forvalues y=5(-1)4 {
		local b=`y'-3
		replace `x'_j4=`x'`b' if job_count==`y'
	}
}
foreach x in wage hrs weeks occ occ1990dd empID {
	replace `x'_j5=`x'1 if job_count==5
}
*subtract current job count from measured year-to-year increase in jobs
*values greater than zero mean that the respondent kept the same job across waves
*zero or less means a switch 

*count number of jobs per year
egen jobs_yr=rownonmiss(empID_j1- empID_j5)
*count jobs over first X years of post-schooling career
sort id year
gen career=0 if year==schl_max
forvalues y=-2(1)30 {
	replace career=`y' if year==schl_max+`y'
}
gen career_neg2=career==-2
gen career_neg1=career==-1
gen career_1=career==1
gen career_2=career==2
forvalues y=3(2)21 {
	local a=`y'+1
	gen career_`y'to`a'=career>=`y' & career<=`a'
}
foreach var of varlist career_neg2-career_17to18 {
    gen ba_`var'=ba_deg*`var'
}
*estimate, for each career year, the probabilities of having 1) same employer as last wave; 2) new employer; 3) no employer
gen unemp=emp==0

tsset id year
forvalues i=1(1)5 {
	sort id year
	gen empID`i'_next=empID`i'[_n-1] if id==id[_n-1]
}
gen same_emp=.
forvalues i=1(1)5 {
	replace same_emp=1 if empID`i'==empID`i'_next & empID`i'!=.
}
replace same_emp=0 if emp==1 & same_emp!=1


tsset id year
gen jobsnum_next=jobsnum-jobsnum[_n-1] if id==id[_n-1]
replace jobsnum_next=0 if jobsnum_next==-1

gen wkexp_wk=wkexp_yr*40
replace wkexp_wk=. if wkexp_wk==0
*set attrition to missing
foreach x in unemp same_emp new_emp {
	replace `x'=. if emp==.
}
*Occupation categories
gen mgmt=occ1990dd>=4 & occ1990dd<=22
gen superv=1 if occ1990dd==243 | occ1990dd==303 | (occ1990dd>=413 & occ1990dd<=415) | occ1990dd==433 | occ1990dd==448 | occ1990dd==450 | occ1990dd==470 | occ1990dd==503 | occ1990dd==558 | occ1990dd==628 | occ1990dd==803 | occ1990dd==823
replace mgmt=1 if superv==1
gen prof=occ1990dd>=23 & occ1990dd<=235
gen white=occ1990dd>=243 & occ1990dd<=389 & superv!=1
gen blue=occ1990dd>=405 & occ1990dd<=889 & superv!=1
foreach x in mgmt prof white blue {
    replace `x'=. if occ1990dd==.
}
merge m:1 occ1990dd using "`dir'\routine_convert_2017.dta", keep(master match) nogen

xtset id
*Table A1
foreach x in jobsnum same_emp wkexp_wk wage {
	xtreg `x' career_neg2-career_21to22 if career<=22 & hs_less==1 & age<=40, fe vce(cluster id)
	xtreg `x' career_neg2-career_17to18 if career<=18 & ba_deg==1 & age<=40, fe vce(cluster id)
	
}

*Figure A8
foreach x in mgmt prof white blue {
	xtreg `x' career_neg2-career_21to22 if career!=. & hs_less==1 & age>=18 & age<=40, fe vce(cluster id)
	xtreg `x' career_neg2-career_17to18 if career<=18 & ba_deg==1 & age>=18 & age<=40, fe vce(cluster id)
}
*Figure 6
foreach x in routine_2017 {
    xtreg `x' career_neg2-career_21to22 if career!=. & hs_less==1 & age>=18 & age<=40, fe vce(cluster id)
	xtreg `x' career_neg2-career_17to18 if career<=18 & ba_deg==1 & age>=18 & age<=40, fe vce(cluster id)
}

*Now create job panel
reshape long wage_j hrs_j occ_j occ1990dd_j weeks_j empID_j, i(id year) j(job)
replace job=. if wage_j==. | empID_j==.
bysort id year: egen job_max=max(job)
egen job_yr=group(job year)
*get rid of blanks first
drop if wage_j==. | weeks_j==.

bysort id: egen jobs_all=total(job_count)

*starting with first year, assign job ID to jobs 1-5 in order, among those with nonmissing wage/hrs/tenure
*count unique jobs
gen jobID=1
sort id year job
*unique job observation
replace jobID=jobID[_n-1]+1 if id==id[_n-1]
*account for same job, across survey waves
replace jobID=jobID[_n-1] if empID1==empID1[_n-1] & id==id[_n-1]
gen ln_wage_j=ln(wage_j)
egen id_job_occ=group(id jobID occ1990dd)

foreach x in weeks hrs {
	bysort id jobID: egen `x'_jobID=total(`x'_j)
}
*Figure A6

*Panel regression, wages by job and year
*individual FE

*HS or less
xtset id
xtreg ln_wage_j career_neg2-career_21to22 if hs_less==1 & age>=19 & age<=40 & career!=. & career<=22 & id_job_occ!=., fe vce(cluster id)
xtset id_job_occ
xtreg ln_wage_j career_neg2-career_21to22 if hs_less==1 & age>=19 & age<=40 & career!=. & career<=22 & id_job_occ!=., fe vce(cluster id_job_occ)
*BA or more
xtset id
xtreg ln_wage_j career_neg2-career_17to18 if ba_deg==1 & age>=23 & age<=40 & career!=. & career<=18 & id_job_occ!=., fe vce(cluster id)
xtset id_job_occ
xtreg ln_wage_j career_neg2-career_17to18 if ba_deg==1 & age>=23 & age<=40 & career!=. & career<=18 & id_job_occ!=., fe vce(cluster id_job_occ)

